clear all
set more off, perm
set mem 10000000
set matsize 10000
version 13

************************************************************** 
*** DD regressions: reduced-form Economic Census outcomes ****
************************************************************** 

** Set file paths
do "$path_code/paths.do"

** Set graph scheme
cd "$path/code/analyze"
set scheme fb, perm

********************************************************************************
********************************************************************************

** 1. Run difference in discontinuities for Economic Census
{
use "$panel/panel_dataset_dd.dta", clear
keep if vplan4<11 & corr_state==1 & sample==1 & sample_h==1 & sing_h==1 & pop_non_zero==1 
drop p_300_X-band300_p_200
drop bin25_p-within_border_100k
drop vplan vplan2 vplan3 vdpr vdpr2 vdpr3
drop treat_x_post dd_* stateyear
drop lights_mean_hat lights_max_hat
drop no_hh tot_p tot_m tot_f vd01_id conc_id v_id_hpca11 vd11_id c_code01 v_id_master ///
	h?_id h?v_id f_area totp_h? count_h? maxp_h? grad_max grad_mean

merge 1:1 pca01_id st_code dt_code vplan4 vdpr4 year using ///
	"$panel/panel_dataset_lights_allyears.dta", keep(3) nogen
egen temp1 = max(lights_max*(year==2001)), by(pca01_id)
egen temp2 = max(lights_max*(year==2011)), by(pca01_id)
gen lights_diff = abs(temp2-temp1)
assert lights_diff!=.
	
egen stdtbk = group(stdt bk_code)
gen X = (tot_p01-300)
assert X!=.
gen D = X>=0
gen DX = D*X

gen aw150 = abs(150-abs(300-tot_p01))
gen aw100 = abs(100-abs(300-tot_p01))

	// Merge in economic census panel
preserve
use "$panel/ec_shrid_pc01_panel.dta", clear	
tab count_dup_vill_ec, missing
drop if count_dup_vill_ec==1
tab count_dup_shrid, missing
drop if count_dup_shrid!=0
tab flag_ec
drop if flag_ec==1
keep pca01_id ec_*
rename ec_year year
tempfile ec
save `ec'
restore
merge 1:1 pca01_id year using `ec'

	// Drop EC villages not in RD sample
egen temp = min(_merge), by(pca01_id)
keep if temp==1	
tab year
drop temp

	// Populate variables for regression
foreach v of varlist D X DX tot_p01 aw150 aw100 stdt stdtbk st_code	pop_mismatch20 lights_diff {
	egen temp = mode(`v'), by(pca01_id)
	replace `v' = temp if `v'==.
	assert `v'!=.
	drop temp
}

gen ec_years = ""
gen yvar = ""
gen fes = ""
gen vce = ""
gen bw = .
gen beta13 = .
gen se13 = .
gen tscore13 = .
gen pvalue13 = .
gen ci95_lo13 = .
gen ci95_hi13 = .
gen beta98 = .
gen se98 = .
gen tscore98 = .
gen pvalue98 = .
gen ci95_lo98 = .
gen ci95_hi98 = .
gen ymean = .
gen nobs = .
gen nclust = .
gen rmse = .
	
local vce = "cluster stdtbk"
local row = 1

	// Diff-in-disc regression loop: using 2005 and 2013
foreach yvar of varlist ec_employees ec_firms {
	foreach bw in 100 150 {
		foreach fe in 1 2 {
			
			if `fe'==1 {
				local fes = "pca01_id year#stdt"
			}
			else if `fe'==2 {
				local fes = "pca01_id year#st_code"
			}
			
			reghdfe `yvar' ///
				1.D#2013.year c.X#i.year c.DX#i.year ///
				if inrange(tot_p01,300-`bw',300+`bw') & inlist(year,2005,2013) & ///
				pop_mismatch20==0 & lights_diff<20 [aw=aw`bw'], a(`fes') vce(`vce') 
				
				replace ec_years = "2005-2013" in `row'
				replace yvar = "`yvar'" in `row'
				replace fes = "`fes'" in `row'
				replace vce = "`vce'" in `row'
				replace bw = `bw' in `row'
				replace beta13 = _b[1.D#2013.year] in `row'
				replace se13 = _se[1.D#2013.year] in `row'	
				replace tscore13 = beta13/se13 in `row'
				replace pvalue13 = 2*ttail(e(df_r),abs(_b[1.D#2013.year]/_se[1.D#2013.year])) in `row'
				replace ci95_lo13 = beta13 - se13*invttail(e(df_r),0.025) in `row'
				replace ci95_hi13 = beta13 + se13*invttail(e(df_r),0.025) in `row'
				sum `yvar' if e(sample)							
				replace ymean = r(mean)	in `row'
				replace nobs = e(N)	in `row'
				replace nclust = e(N_clust) in `row'
				replace rmse = e(rmse) in `row'
				local row = `row' + 1
				
		}
	}
}

	// Diff-in-disc regression loop: using 1998, 2005, and 2013
foreach yvar of varlist ec_employees ec_firms {
	foreach bw in 100 150 {
		foreach fe in 1 2 {
			
			if `fe'==1 {
				local fes = "pca01_id year#stdt"
			}
			else if `fe'==2 {
				local fes = "pca01_id year#st_code"
			}
			
			reghdfe `yvar' ///
				1.D#1998.year 1.D#2013.year c.X#i.year c.DX#i.year ///
				if inrange(tot_p01,300-`bw',300+`bw') & inlist(year,1998,2005,2013) & ///
				pop_mismatch20==0 & lights_diff<20 [aw=aw`bw'], a(`fes') vce(`vce') 
				
				replace ec_years = "1998-2013" in `row'
				replace yvar = "`yvar'" in `row'
				replace fes = "`fes'" in `row'
				replace vce = "`vce'" in `row'
				replace bw = `bw' in `row'
				replace beta13 = _b[1.D#2013.year] in `row'
				replace se13 = _se[1.D#2013.year] in `row'	
				replace tscore13 = beta13/se13 in `row'
				replace pvalue13 = 2*ttail(e(df_r),abs(_b[1.D#2013.year]/_se[1.D#2013.year])) in `row'
				replace ci95_lo13 = beta13 - se13*invttail(e(df_r),0.025) in `row'
				replace ci95_hi13 = beta13 + se13*invttail(e(df_r),0.025) in `row'
				replace beta98 = _b[1.D#1998.year] in `row'
				replace se98 = _se[1.D#1998.year] in `row'	
				replace tscore98 = beta98/se98 in `row'
				replace pvalue98 = 2*ttail(e(df_r),abs(_b[1.D#1998.year]/_se[1.D#1998.year])) in `row'
				replace ci95_lo98 = beta98 - se98*invttail(e(df_r),0.025) in `row'
				replace ci95_hi98 = beta98 + se98*invttail(e(df_r),0.025) in `row'
				sum `yvar' if e(sample)							
				replace ymean = r(mean)	in `row'
				replace nobs = e(N)	in `row'
				replace nclust = e(N_clust) in `row'
				replace rmse = e(rmse) in `row'
				local row = `row' + 1
				
		}
	}
}


keep ec_year-rmse
dropmiss, obs force
compress
save "$results/diff_in_disc_outcomes_ec.dta", replace
	
}

********************************************************************************
********************************************************************************

** 2. DD pooled, and split by population bin (EC 1990, 1998, 2005, 2013)
{

use "$panel/panel_dataset_dd.dta", clear

	// Merge in economic census panel
preserve
use "$panel/ec_shrid_pc01_panel.dta", clear	
tab count_dup_vill_ec, missing
drop if count_dup_vill_ec==1
tab count_dup_shrid, missing
drop if count_dup_shrid!=0
tab flag_ec
drop if flag_ec==1
keep pca01_id ec_*
rename ec_year year
tempfile ec
save `ec'
restore
merge 1:1 pca01_id year using `ec'

	// Drop EC villages that never merge
egen temp = min(_merge), by(pca01_id)
keep if temp==1	
tab year
drop temp

	// Populate variables for regression
foreach v of varlist tot_p01 stdt st_code dt_code sample_1011 vplan4 {
	egen temp = mode(`v'), by(pca01_id)
	replace `v' = temp if `v'==.
	drop temp
}
drop if year==2001 | year==2011
dropmiss, force
gen treat_x_post = vplan4<11 & year>2005

	// Bring in NSS expenditure trend variables
preserve
use "$panel/panel_dataset_dd_nss.dta", clear
keep st_code dt_code exp05*
duplicates drop
tempfile nss
save `nss'
restore
merge m:1 st_code dt_code using `nss', nogen keep(1 3)
	
	// Bring in 2001 Census
preserve
use "$panel/panel_dataset_no_lights.dta", clear
keep pca01_id lit_p_01 work_p_01 work_m_01 work_f_01 vd_geo_a_01 vd_geo_a_irr_01 vd_geo_k_town_01 ///
	vd_edu_d_01 vd_hea_d_medi_01 vd_wat_d_any_01 vd_tra_d_bus_01 vdpr_tra_d_app_pr_01 vd_pwr_d_all_01 ///
	vdpr_pwr_d_oth_01 vd_pwr_d_agr_01 vd_pwr_d_dom_01 vd_fin_d_ac_soc_01 vd_pwr_d_any_01 pct_work_main_01
unique pca01_id
assert r(unique)==r(N)
tempfile vars2001
save `vars2001'	
restore
merge m:1 pca01_id using `vars2001', nogen keep(1 3)

	
	// Create 500-person population bins (switching from 300 to 500 to align with NSS population medians)
cap drop *popbin* *popBIN*
gen popbin = 0
local bin = 500
local bmax = 3000
local bmax2 = 2900	
forvalues i = 0(`bin')`bmax2' {
  local iplus1 = `i' + `bin'
  gen popbin_`i'_`iplus1' = tot_p01>`i' & tot_p01<=`iplus1'
  la var popbin_`i'_`iplus1' "2001 population bin: (`i',`iplus1']"
  replace popbin = `i' if popbin_`i'_`iplus1'==1
}
gen popbin_`bmax'_max = tot_p01>`bmax'
replace popbin = `bmax' if popbin_`bmax'_max==1
la var popbin "Lower end of 2001 population bin"

	// DD population bins
forvalues i = 0(`bin')`bmax2' {
  local iplus1 = `i' + `bin'
  gen dd_x_popbin_`i'_`iplus1' = treat_x_post * popbin_`i'_`iplus1'
  la var dd_x_popbin_`i'_`iplus1' "DD treat x popbin: `i'-`iplus1'"
}
gen dd_x_popbin_`bmax'_max = treat_x_post * popbin_`bmax'_max
la var dd_x_popbin_`bmax'_max "DD treat x popbin `bmax' to infinity"


	// Create larger 1000-person population bins
gen popBIN = 0
local BIN = 1000
local bmax = 3000
local bmax2 = 2900	
forvalues i = 0(`BIN')`bmax2' {
  local iplus1 = `i' + `BIN'
  gen popBIN_`i'_`iplus1' = tot_p01>`i' & tot_p01<=`iplus1'
  la var popBIN_`i'_`iplus1' "2001 population bin: (`i',`iplus1']"
  replace popBIN = `i' if popBIN_`i'_`iplus1'==1
}
gen popBIN_`bmax'_max = tot_p01>`bmax'
replace popBIN = `bmax' if popBIN_`bmax'_max==1
la var popBIN "Lower end of 2001 population bin"

	// DD population bins (larger)
forvalues i = 0(`BIN')`bmax2' {
  local iplus1 = `i' + `BIN'
  gen dd_x_popBIN_`i'_`iplus1' = treat_x_post * popBIN_`i'_`iplus1'
  la var dd_x_popBIN_`i'_`iplus1' "DD treat x popbin: `i'-`iplus1'"
}
gen dd_x_popBIN_`bmax'_max = treat_x_post * popBIN_`bmax'_max
la var dd_x_popBIN_`bmax'_max "DD treat x popbin `bmax' to infinity"


	// REGRESSION LOOP
cap drop yvar-rmse
cap drop row_id
gen yvar = ""
gen dd_model = ""
gen fes = ""
gen ifs = ""
gen beta = .
gen se = .
gen tscore = .
gen pvalue = .
gen ci95_lo = .
gen ci95_hi = .
forvalues p = 0(`bin')`bmax' {
	local p1 = `p'+`bin'
	if `p'==`bmax' {
		local p1 = "max"
	}
	gen beta_`p'_`p1' = .
	gen se_`p'_`p1' = .
	gen tscore_`p'_`p1' = .
	gen pvalue_`p'_`p1' = .
	gen ci95_lo_`p'_`p1' = .
	gen ci95_hi_`p'_`p1' = .
	gen ymean_`p'_`p1' = .
	gen ymean13_`p'_`p1' = .
}
forvalues p = 0(`BIN')`bmax' {
	local p1 = `p'+`BIN'
	if `p'==`bmax' {
		local p1 = "max"
	}
	gen beta_big_`p'_`p1' = .
	gen se_big_`p'_`p1' = .
	gen tscore_big_`p'_`p1' = .
	gen pvalue_big_`p'_`p1' = .
	gen ci95_lo_big_`p'_`p1' = .
	gen ci95_hi_big_`p'_`p1' = .
	gen ymean_big_`p'_`p1' = .
	gen ymean13_big_`p'_`p1' = .
}
gen ymean = .
gen nobs = .
gen nclust = .
gen rmse = .
gen row_id = .

local row = 1


foreach dd_sample in 1 2 3 4 {
	
	if `dd_sample'==1 {
		local ifs = ""
	}
	else if `dd_sample'==2 {
		local ifs = " if sample_1011==1"
	}
	else if `dd_sample'==3 {
		local ifs = " if year>1990"
	}
	else if `dd_sample'==4 {
		local ifs = " if year>1990 & sample_1011==1"
	}
		
	foreach yvar of varlist ec_employees ec_firms {

		foreach FEs in 1 2 3 4 5 {
			
			if `FEs'==1 {
				local fes = "pca01_id year "
			}
			else if `FEs'==2 {
				local fes = "pca01_id year exp05_st_4ile#c.year exp05_ntl_10ile#c.year"
			}
			else if `FEs'==3 {
				local fes = "pca01_id year exp05_st_4ile#c.year exp05_ntl_10ile#c.year st_code#c.year"
			}
			else if `FEs'==4 {
				local fes = "pca01_id year exp05_st_4ile#c.year exp05_ntl_10ile#c.year st_code#c.year c.(lit_p_01 work_p_01 work_f_01 vd_geo_a_01 vd_geo_k_town_01 vd_edu_d_01 vd_hea_d_medi_01 vd_wat_d_any_01 vd_tra_d_bus_01 vdpr_tra_d_app_pr_01 vd_pwr_d_all_01 vd_pwr_d_any_01 vdpr_pwr_d_oth_01 vd_pwr_d_agr_01 vd_pwr_d_dom_01 vd_fin_d_ac_soc_01)#year"
			}
			else if `FEs'==5 {
				local fes = "pca01_id year exp05_st_4ile#year exp05_ntl_10ile#year st_code#c.year c.(lit_p_01 work_p_01 work_f_01 vd_geo_a_01 vd_geo_k_town_01 vd_edu_d_01 vd_hea_d_medi_01 vd_wat_d_any_01 vd_tra_d_bus_01 vdpr_tra_d_app_pr_01 vd_pwr_d_all_01 vd_pwr_d_any_01 vdpr_pwr_d_oth_01 vd_pwr_d_agr_01 vd_pwr_d_dom_01 vd_fin_d_ac_soc_01)#year"
			}
						
			*pooled DD
			local dd_model = "pooled"
			reghdfe `yvar' treat_x_post `ifs', absorb(`fes') vce(cluster stdt)
			qui replace yvar = "`yvar'"	if _n==`row'
			qui replace dd_model = "`dd_model'" if _n==`row'
			qui replace fes = "`fes'" if _n==`row'
			qui replace ifs = "`ifs'" if _n==`row'
			qui replace beta = _b[treat_x_post] if _n==`row'
			qui replace se = _se[treat_x_post] if _n==`row'
			qui replace tscore = beta/se if _n==`row'
			qui replace pvalue = 2*ttail(e(df_r),abs(_b[treat_x_post]/_se[treat_x_post])) if _n==`row'
			qui replace ci95_lo = beta - se*invttail(e(df_r),0.025) if _n==`row'
			qui replace ci95_hi = beta + se*invttail(e(df_r),0.025) if _n==`row'
			qui sum `yvar' if e(sample)	
			qui replace ymean = r(mean)	if _n==`row'
			qui replace nobs = e(N)	if _n==`row'
			qui replace nclust = e(N_clust)	if _n==`row'
			qui replace rmse = e(rmse) if _n==`row'
			qui replace row_id	= `row'	if _n==`row'
			qui local row = `row' + 1
			
			* DD w/ pop bins
			local dd_model = "pop bins"
			local fes = subinstr("`fes'"," year ", " year#popbin ",1)
			reghdfe `yvar' dd_x_popbin_*  `ifs', absorb(`fes') vce(cluster stdt)
			qui replace yvar = "`yvar'"	if _n==`row'
			qui replace dd_model = "`dd_model'" if _n==`row'
			qui replace fes = "`fes'" if _n==`row'
			qui replace ifs = "`ifs'" if _n==`row'
			forvalues p = 0(`bin')`bmax' {	
				local p1 = `p'+`bin'
				if `p'==`bmax' {
					local p1 = "max"
				}
				qui replace beta_`p'_`p1' = _b[dd_x_popbin_`p'_`p1'] if _n==`row'
				qui replace se_`p'_`p1' = _se[dd_x_popbin_`p'_`p1'] if _n==`row'
				qui replace tscore_`p'_`p1' = beta_`p'_`p1'/se_`p'_`p1' if _n==`row'
				qui replace pvalue_`p'_`p1' = 2*ttail(e(df_r),abs(_b[dd_x_popbin_`p'_`p1']/_se[dd_x_popbin_`p'_`p1'])) if _n==`row'
				qui replace ci95_lo_`p'_`p1' = beta_`p'_`p1' - se_`p'_`p1'*invttail(e(df_r),0.025) if _n==`row'
				qui replace ci95_hi_`p'_`p1' = beta_`p'_`p1' + se_`p'_`p1'*invttail(e(df_r),0.025) if _n==`row'
				qui sum `yvar' if e(sample) & popbin_`p'_`p1'==1
				qui replace ymean_`p'_`p1' = r(mean) if _n==`row'
				qui sum `yvar' if e(sample) & popbin_`p'_`p1'==1 & year==2013
				qui replace ymean13_`p'_`p1' = r(mean) if _n==`row'
			}
			qui sum `yvar' if e(sample)							
			qui replace ymean = r(mean)	if _n==`row'
			qui replace nobs = e(N)	if _n==`row'
			qui replace nclust = e(N_clust)	if _n==`row'
			qui replace rmse = e(rmse)	if _n==`row'
			qui replace row_id	= `row'	if _n==`row'
			qui local row = `row' + 1

			* DD w/ bigger pop bins 
			local dd_model = "bigger pop bins"
			local fes = subinstr("`fes'"," year#popbin ", " year#popBIN ",1)
			reghdfe `yvar' dd_x_popBIN_*  `ifs', absorb(`fes') vce(cluster stdt)
			qui replace yvar = "`yvar'"	if _n==`row'
			qui replace dd_model = "`dd_model'" if _n==`row'
			qui replace fes = "`fes'" if _n==`row'
			qui replace ifs = "`ifs'" if _n==`row'
			forvalues p = 0(`BIN')`bmax' {	
				local p1 = `p'+`BIN'
				if `p'==`bmax' {
					local p1 = "max"
				}
				qui replace beta_big_`p'_`p1' = _b[dd_x_popBIN_`p'_`p1'] if _n==`row'
				qui replace se_big_`p'_`p1' = _se[dd_x_popBIN_`p'_`p1'] if _n==`row'
				qui replace tscore_big_`p'_`p1' = beta_big_`p'_`p1'/se_big_`p'_`p1' if _n==`row'
				qui replace pvalue_big_`p'_`p1' = 2*ttail(e(df_r),abs(_b[dd_x_popBIN_`p'_`p1']/_se[dd_x_popBIN_`p'_`p1'])) if _n==`row'
				qui replace ci95_lo_big_`p'_`p1' = beta_big_`p'_`p1' - se_big_`p'_`p1'*invttail(e(df_r),0.025) if _n==`row'
				qui replace ci95_hi_big_`p'_`p1' = beta_big_`p'_`p1' + se_big_`p'_`p1'*invttail(e(df_r),0.025) if _n==`row'
				qui sum `yvar' if e(sample) & popBIN_`p'_`p1'==1
				qui replace ymean_big_`p'_`p1' = r(mean) if _n==`row'
				qui sum `yvar' if e(sample) & popBIN_`p'_`p1'==1 & year==2013
				qui replace ymean13_big_`p'_`p1' = r(mean) if _n==`row'
			}
			qui sum `yvar' if e(sample)							
			qui replace ymean = r(mean)	if _n==`row'
			qui replace nobs = e(N)	if _n==`row'
			qui replace nclust = e(N_clust)	if _n==`row'
			qui replace rmse = e(rmse)	if _n==`row'
			qui replace row_id	= `row'	if _n==`row'
			qui local row = `row' + 1
			
		}
	}
}

	// save results
keep yvar-row_id
keep if yvar!=""
compress
saveold "$results/dd_results_outcomes_ec_4years.dta", replace

}

********************************************************************************
********************************************************************************

